package com.zzl.spark.parquet;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SaveMode;

public class ParquetLoadData {

    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setMaster("local").setAppName("ParquetLoadData");
        JavaSparkContext sc = new JavaSparkContext(conf);
        System.setProperty("hadoop.home.dir", "E:\\hadoop");

        SQLContext sqlContext = new SQLContext(sc);

        Dataset<Row> parquet = sqlContext.read().format("parquet").load("hdfs://zhangzeli-node1:9000/input/users.parquet");
//        sqlContext.parquetFile("")
        parquet.show();
        parquet.printSchema();
        parquet.createOrReplaceTempView("t1");
        Dataset<Row> sql = sqlContext.sql("select * from t1 where name ='Alyssa'");
        sql.show();
        sql.write().mode(SaveMode.Append).json("hdfs://zhangzeli-node1:9000/input/result.json");
        sc.stop();

    }
}
